This pipeline is mainly used “Giotto: a toolbox for integrative analysis and visualization of spatial expression data” published on ~Genome Biology~ and its Github guideline.
~Spatial transcriptomic~ and proteomic technologies have provided new opportunities to investigate cells in their native microenvironment. Here we present Giotto, a comprehensive and open-source toolbox for spatial data analysis and visualization. The analysis module provides end-to-end analysis by implementing a wide range of algorithms for characterizing tissue composition, spatial expression patterns, and cellular interactions. Furthermore, single-cell RNAseq data can be integrated for spatial cell-type enrichment analysis. The visualization module allows users to interactively visualize analysis outputs and imaging features. To demonstrate its general applicability, we apply Giotto to a wide range of datasets encompassing diverse technologies and platforms. In this pipeline, we will focus on exploring ‘mini seqFISH’ datasets.
A matrix of gene expression per spot
A matirx of spot coordination
image (optional)
#
library(Giotto)
# to automatically save figures in save_dir set save_plot to TRUE
temp_dir = getwd()
temp_dir = "~/Temp/"
myinstructions = createGiottoInstructions(save_dir = temp_dir, save_plot = FALSE, show_plot = F)
##
## no external python path or giotto environment was found, will use default python path if available
##
## 1. use installGiottoEnvironment() to install a local giotto python environment which automatically installs all modules
##
## 2. provide an existing python path to python_path to use your own python path which has all modules installed
minimum requirements:
- matrix with expression information (or path to)
- x,y(,z) coordinates for cells or spots (or path to)
my_working_dir = "/data"
# getSpatialDataset(dataset = 'seqfish_SS_cortex', directory = my_working_dir)
# giotto object
expr_path = "/data/data/seqfish_field_expr.txt.gz"
loc_path = "/data/data/seqfish_field_locs.txt"
seqfish_mini <- createGiottoObject(raw_exprs = expr_path, spatial_locs = loc_path)
## Consider to install these (optional) packages to run all possible Giotto commands: RTriangle FactoMiner
## Giotto does not automatically install all these packages as they are not absolutely required and this reduces the number of dependencies
## no external python path or giotto environment was found, will use default python path if available
##
## 1. use installGiottoEnvironment() to install a local giotto python environment which automatically installs all modules
##
## 2. provide an existing python path to python_path to use your own python path which has all modules installed
How to work with Giotto instructions that are part of your Giotto object:
- show the instructions associated with your Giotto object with showGiottoInstructions
- change one or more instructions with changeGiottoInstructions
- replace all instructions at once with replaceGiottoInstructions
- read or get a specific giotto instruction with readGiottoInstructions
Of note, the python path can only be set once in an R session. See the reticulate package for more information.
# show instructions associated with giotto object (seqfish_mini)
showGiottoInstructions(seqfish_mini)
## $python_path
## [1] "/usr/bin/python3"
##
## $show_plot
## [1] TRUE
##
## $return_plot
## [1] TRUE
##
## $save_plot
## [1] FALSE
##
## $save_dir
## [1] "/data"
##
## $plot_format
## [1] "png"
##
## $dpi
## [1] 300
##
## $units
## [1] "in"
##
## $height
## [1] 9
##
## $width
## [1] 9
##
## $is_docker
## [1] FALSE
seqfish_mini <- filterGiotto(gobject = seqfish_mini, expression_threshold = 0.5, gene_det_in_min_cells = 20,
min_det_genes_per_cell = 0)
seqfish_mini <- normalizeGiotto(gobject = seqfish_mini, scalefactor = 6000, verbose = T)
##
## first scale genes and then cells
seqfish_mini <- addStatistics(gobject = seqfish_mini)
seqfish_mini <- adjustGiottoMatrix(gobject = seqfish_mini, expression_values = c("normalized"),
covariate_columns = c("nr_genes", "total_expr"))
seqfish_mini <- calculateHVG(gobject = seqfish_mini)
## return_plot = TRUE and return_gobject = TRUE
##
## plot will not be returned to object, but can still be saved with save_plot = TRUE or manually
seqfish_mini <- runPCA(gobject = seqfish_mini)
## hvg was found in the gene metadata information and will be used to select highly variable genes
screePlot(seqfish_mini, ncp = 20)
## PCA with name: pca already exists and will be used for the screeplot
jackstrawPlot(seqfish_mini, ncp = 20)
## 1 2 3 4 5 6 7 8 9 10 number of estimated significant components: 5
plotPCA(seqfish_mini)
seqfish_mini <- runUMAP(seqfish_mini, dimensions_to_use = 1:5, n_threads = 2)
plotUMAP(gobject = seqfish_mini)
seqfish_mini <- runtSNE(seqfish_mini, dimensions_to_use = 1:5)
plotTSNE(gobject = seqfish_mini)
seqfish_mini <- createNearestNetwork(gobject = seqfish_mini, dimensions_to_use = 1:5, k = 5)
seqfish_mini <- doLeidenCluster(gobject = seqfish_mini, resolution = 0.4, n_iterations = 1000)
# visualize UMAP cluster results
plotUMAP(gobject = seqfish_mini, cell_color = "leiden_clus", show_NN_network = T, point_size = 2.5)
# visualize UMAP and spatial results
spatDimPlot(gobject = seqfish_mini, cell_color = "leiden_clus", spat_point_shape = "voronoi")
# heatmap and dendrogram
showClusterHeatmap(gobject = seqfish_mini, cluster_column = "leiden_clus")
showClusterDendrogram(seqfish_mini, h = 0.5, rotate = T, cluster_column = "leiden_clus")
gini_markers = findMarkers_one_vs_all(gobject = seqfish_mini, method = "gini", expression_values = "normalized",
cluster_column = "leiden_clus", min_genes = 20, min_expr_gini_score = 0.5, min_det_gini_score = 0.5)
##
## start with cluster 1
##
## start with cluster 2
##
## start with cluster 3
##
## start with cluster 4
##
## start with cluster 5
##
## start with cluster 6
##
## start with cluster 7
##
## start with cluster 8
# get top 2 genes per cluster and visualize with violinplot
topgenes_gini = gini_markers[, head(.SD, 2), by = "cluster"]
violinPlot(seqfish_mini, genes = topgenes_gini$genes, cluster_column = "leiden_clus")
# get top 6 genes per cluster and visualize with heatmap
topgenes_gini2 = gini_markers[, head(.SD, 6), by = "cluster"]
plotMetaDataHeatmap(seqfish_mini, selected_genes = topgenes_gini2$genes, metadata_cols = c("leiden_clus"))
clusters_cell_types = c("cell A", "cell B", "cell C", "cell D", "cell E", "cell F", "cell G", "cell H")
names(clusters_cell_types) = 1:8
seqfish_mini = annotateGiotto(gobject = seqfish_mini, annotation_vector = clusters_cell_types, cluster_column = "leiden_clus",
name = "cell_types")
# check new cell metadata
pDataDT(seqfish_mini)
# visualize annotations
spatDimPlot(gobject = seqfish_mini, cell_color = "cell_types", spat_point_size = 3, dim_point_size = 3)
Create a grid based on defined stepsizes in the x,y(,z) axes.
seqfish_mini <- createSpatialGrid(gobject = seqfish_mini, sdimx_stepsize = 300, sdimy_stepsize = 300,
minimum_padding = 50)
showGrids(seqfish_mini)
## The following grids are available: spatial_grid
## [1] "spatial_grid"
# visualize grid
spatPlot(gobject = seqfish_mini, show_grid = T, point_size = 1.5)
plotStatDelaunayNetwork(gobject = seqfish_mini, maximum_distance = 400)
seqfish_mini = createSpatialNetwork(gobject = seqfish_mini, minimum_k = 2, maximum_distance_delaunay = 400)
seqfish_mini = createSpatialNetwork(gobject = seqfish_mini, minimum_k = 2, method = "kNN", k = 10)
showNetworks(seqfish_mini)
## The following images are available: Delaunay_network kNN_network
## [1] "Delaunay_network" "kNN_network"
# visualize the two different spatial networks
spatPlot(gobject = seqfish_mini, show_network = T, network_color = "blue", spatial_network_name = "Delaunay_network",
point_size = 2.5, cell_color = "leiden_clus")
spatPlot(gobject = seqfish_mini, show_network = T, network_color = "blue", spatial_network_name = "kNN_network",
point_size = 2.5, cell_color = "leiden_clus")
Identify spatial genes with 3 different methods:
- binSpect with kmeans binarization (default)
- binSpect with rank binarization
- silhouetteRank
Visualize top 4 genes per method.
km_spatialgenes = binSpect(seqfish_mini)
##
## This is the single parameter version of binSpect
## 1. matrix binarization complete
##
## 2. spatial enrichment test completed
##
## 3. (optional) average expression of high expressing cells calculated
##
## 4. (optional) number of high expressing cells calculated
spatGenePlot(seqfish_mini, expression_values = "scaled", genes = km_spatialgenes[1:4]$genes, point_shape = "border",
point_border_stroke = 0.1, show_network = F, network_color = "lightgrey", point_size = 2.5,
cow_n_col = 2)
rank_spatialgenes = binSpect(seqfish_mini, bin_method = "rank")
##
## This is the single parameter version of binSpect
## 1. matrix binarization complete
##
## 2. spatial enrichment test completed
##
## 3. (optional) average expression of high expressing cells calculated
##
## 4. (optional) number of high expressing cells calculated
spatGenePlot(seqfish_mini, expression_values = "scaled", genes = rank_spatialgenes[1:4]$genes, point_shape = "border",
point_border_stroke = 0.1, show_network = F, network_color = "lightgrey", point_size = 2.5,
cow_n_col = 2)
silh_spatialgenes = silhouetteRank(gobject = seqfish_mini) # TODO: suppress print output
spatGenePlot(seqfish_mini, expression_values = "scaled", genes = silh_spatialgenes[1:4]$genes, point_shape = "border",
point_border_stroke = 0.1, show_network = F, network_color = "lightgrey", point_size = 2.5,
cow_n_col = 2)
Identify robust spatial co-expression patterns using the spatial network or grid and a subset of individual spatial genes.
1. calculate spatial correlation scores
2. cluster correlation scores
# 1. calculate spatial correlation scores
ext_spatial_genes = km_spatialgenes[1:500]$genes
spat_cor_netw_DT = detectSpatialCorGenes(seqfish_mini, method = "network", spatial_network_name = "Delaunay_network",
subset_genes = ext_spatial_genes)
# 2. cluster correlation scores
spat_cor_netw_DT = clusterSpatialCorGenes(spat_cor_netw_DT, name = "spat_netw_clus", k = 8)
heatmSpatialCorGenes(seqfish_mini, spatCorObject = spat_cor_netw_DT, use_clus_name = "spat_netw_clus")
netw_ranks = rankSpatialCorGroups(seqfish_mini, spatCorObject = spat_cor_netw_DT, use_clus_name = "spat_netw_clus")
top_netw_spat_cluster = showSpatialCorGenes(spat_cor_netw_DT, use_clus_name = "spat_netw_clus",
selected_clusters = 6, show_top_genes = 1)
cluster_genes_DT = showSpatialCorGenes(spat_cor_netw_DT, use_clus_name = "spat_netw_clus", show_top_genes = 1)
cluster_genes = cluster_genes_DT$clus
names(cluster_genes) = cluster_genes_DT$gene_ID
seqfish_mini = createMetagenes(seqfish_mini, gene_clusters = cluster_genes, name = "cluster_metagene")
spatCellPlot(seqfish_mini, spat_enr_names = "cluster_metagene", cell_annotation_values = netw_ranks$clusters,
point_size = 1.5, cow_n_col = 3)
hmrf_folder = paste0(temp_dir, "/", "11_HMRF/")
if (!file.exists(hmrf_folder)) dir.create(hmrf_folder, recursive = T)
# perform hmrf
my_spatial_genes = km_spatialgenes[1:100]$genes
HMRF_spatial_genes = doHMRF(gobject = seqfish_mini, expression_values = "scaled", spatial_genes = my_spatial_genes,
spatial_network_name = "Delaunay_network", k = 9, betas = c(28, 2, 2), output_folder = paste0(hmrf_folder,
"/", "Spatial_genes/SG_top100_k9_scaled"))
##
## expression_matrix.txt already exists at this location, will be overwritten
##
## spatial_genes.txt already exists at this location, will be overwritten
##
## spatial_network.txt already exists at this location, will be overwritten
##
## spatial_cell_locations.txt already exists at this location, will be overwritten
## [1] "/usr/bin/python3 /home/dean/R/x86_64-pc-linux-gnu-library/3.6/Giotto/python/reader2.py -l \"~/Temp//11_HMRF//Spatial_genes/SG_top100_k9_scaled/spatial_cell_locations.txt\" -g \"~/Temp//11_HMRF//Spatial_genes/SG_top100_k9_scaled/spatial_genes.txt\" -n \"~/Temp//11_HMRF//Spatial_genes/SG_top100_k9_scaled/spatial_network.txt\" -e \"~/Temp//11_HMRF//Spatial_genes/SG_top100_k9_scaled/expression_matrix.txt\" -o \"~/Temp//11_HMRF//Spatial_genes/SG_top100_k9_scaled/result.spatial.zscore\" -a test -k 9 -b 28 2 2 -t 1e-10 -z none -s 100 -i 100"
# check and select hmrf
for (i in seq(28, 30, by = 2)) {
viewHMRFresults2D(gobject = seqfish_mini, HMRFoutput = HMRF_spatial_genes, k = 9, betas_to_view = i,
point_size = 2)
}
## [1] "/usr/bin/python3 /home/dean/R/x86_64-pc-linux-gnu-library/3.6/Giotto/python/get_result2.py -r \"~/Temp//11_HMRF//Spatial_genes/SG_top100_k9_scaled/result.spatial.zscore\" -a test -k 9 -b 28"
## [1] "/usr/bin/python3 /home/dean/R/x86_64-pc-linux-gnu-library/3.6/Giotto/python/get_result2.py -r \"~/Temp//11_HMRF//Spatial_genes/SG_top100_k9_scaled/result.spatial.zscore\" -a test -k 9 -b 30"
seqfish_mini = addHMRF(gobject = seqfish_mini, HMRFoutput = HMRF_spatial_genes, k = 9, betas_to_add = c(28),
hmrf_name = "HMRF")
## [1] "/usr/bin/python3 /home/dean/R/x86_64-pc-linux-gnu-library/3.6/Giotto/python/get_result2.py -r \"~/Temp//11_HMRF//Spatial_genes/SG_top100_k9_scaled/result.spatial.zscore\" -a test -k 9 -b 28"
# visualize selected hmrf result
giotto_colors = Giotto:::getDistinctColors(9)
names(giotto_colors) = 1:9
spatPlot(gobject = seqfish_mini, cell_color = "HMRF_k9_b.28", point_size = 3, coord_fix_ratio = 1,
cell_color_code = giotto_colors)
edge_weight_range_enrichment = c(2,5))
## select top 25th highest expressing genes
gene_metadata = fDataDT(seqfish_mini)
plot(gene_metadata$nr_cells, gene_metadata$mean_expr)
plot(gene_metadata$nr_cells, gene_metadata$mean_expr_det)
quantile(gene_metadata$mean_expr_det)
## 0% 25% 50% 75% 100%
## 3.647796 3.831043 3.905460 3.987835 6.082388
high_expressed_genes = gene_metadata[mean_expr_det > 4]$gene_ID
## identify genes that are associated with proximity to other cell types
CPGscoresHighGenes = findCPG(gobject = seqfish_mini, selected_genes = high_expressed_genes, spatial_network_name = "Delaunay_network",
cluster_column = "cell_types", diff_test = "permutation", adjust_method = "fdr", nr_permutations = 500,
do_parallel = T, cores = 2)
## visualize all genes
plotCellProximityGenes(seqfish_mini, cpgObject = CPGscoresHighGenes, method = "dotplot")
## filter genes
CPGscoresFilt = filterCPG(CPGscoresHighGenes, min_cells = 2, min_int_cells = 2, min_fdr = 0.1, min_spat_diff = 0.1,
min_log2_fc = 0.1, min_zscore = 1)
## visualize subset of interaction changed genes (ICGs)
ICG_genes = c("Cpne2", "Scg3", "Cmtm3", "Cplx1", "Lingo1")
ICG_genes_types = c("cell E", "cell D", "cell D", "cell G", "cell E")
names(ICG_genes) = ICG_genes_types
plotICG(gobject = seqfish_mini, cpgObject = CPGscoresHighGenes, source_type = "cell A", source_markers = c("Csf1r",
"Laptm5"), ICG_genes = ICG_genes)
LR_data = data.table::fread(system.file("extdata", "mouse_ligand_receptors.txt", package = "Giotto"))
LR_data[, `:=`(ligand_det, ifelse(mouseLigand %in% seqfish_mini@gene_ID, T, F))]
LR_data[, `:=`(receptor_det, ifelse(mouseReceptor %in% seqfish_mini@gene_ID, T, F))]
LR_data_det = LR_data[ligand_det == T & receptor_det == T]
select_ligands = LR_data_det$mouseLigand
select_receptors = LR_data_det$mouseReceptor
## get statistical significance of gene pair expression changes based on expression ##
expr_only_scores = exprCellCellcom(gobject = seqfish_mini, cluster_column = "cell_types", random_iter = 500,
gene_set_1 = select_ligands, gene_set_2 = select_receptors)
## simulation 1
## simulation 2
## simulation 3
## simulation 4
## simulation 5
## simulation 6
## simulation 7
## simulation 8
## simulation 9
## simulation 10
## simulation 11
## simulation 12
## simulation 13
## simulation 14
## simulation 15
## simulation 16
## simulation 17
## simulation 18
## simulation 19
## simulation 20
## simulation 21
## simulation 22
## simulation 23
## simulation 24
## simulation 25
## simulation 26
## simulation 27
## simulation 28
## simulation 29
## simulation 30
## simulation 31
## simulation 32
## simulation 33
## simulation 34
## simulation 35
## simulation 36
## simulation 37
## simulation 38
## simulation 39
## simulation 40
## simulation 41
## simulation 42
## simulation 43
## simulation 44
## simulation 45
## simulation 46
## simulation 47
## simulation 48
## simulation 49
## simulation 50
## simulation 51
## simulation 52
## simulation 53
## simulation 54
## simulation 55
## simulation 56
## simulation 57
## simulation 58
## simulation 59
## simulation 60
## simulation 61
## simulation 62
## simulation 63
## simulation 64
## simulation 65
## simulation 66
## simulation 67
## simulation 68
## simulation 69
## simulation 70
## simulation 71
## simulation 72
## simulation 73
## simulation 74
## simulation 75
## simulation 76
## simulation 77
## simulation 78
## simulation 79
## simulation 80
## simulation 81
## simulation 82
## simulation 83
## simulation 84
## simulation 85
## simulation 86
## simulation 87
## simulation 88
## simulation 89
## simulation 90
## simulation 91
## simulation 92
## simulation 93
## simulation 94
## simulation 95
## simulation 96
## simulation 97
## simulation 98
## simulation 99
## simulation 100
## simulation 101
## simulation 102
## simulation 103
## simulation 104
## simulation 105
## simulation 106
## simulation 107
## simulation 108
## simulation 109
## simulation 110
## simulation 111
## simulation 112
## simulation 113
## simulation 114
## simulation 115
## simulation 116
## simulation 117
## simulation 118
## simulation 119
## simulation 120
## simulation 121
## simulation 122
## simulation 123
## simulation 124
## simulation 125
## simulation 126
## simulation 127
## simulation 128
## simulation 129
## simulation 130
## simulation 131
## simulation 132
## simulation 133
## simulation 134
## simulation 135
## simulation 136
## simulation 137
## simulation 138
## simulation 139
## simulation 140
## simulation 141
## simulation 142
## simulation 143
## simulation 144
## simulation 145
## simulation 146
## simulation 147
## simulation 148
## simulation 149
## simulation 150
## simulation 151
## simulation 152
## simulation 153
## simulation 154
## simulation 155
## simulation 156
## simulation 157
## simulation 158
## simulation 159
## simulation 160
## simulation 161
## simulation 162
## simulation 163
## simulation 164
## simulation 165
## simulation 166
## simulation 167
## simulation 168
## simulation 169
## simulation 170
## simulation 171
## simulation 172
## simulation 173
## simulation 174
## simulation 175
## simulation 176
## simulation 177
## simulation 178
## simulation 179
## simulation 180
## simulation 181
## simulation 182
## simulation 183
## simulation 184
## simulation 185
## simulation 186
## simulation 187
## simulation 188
## simulation 189
## simulation 190
## simulation 191
## simulation 192
## simulation 193
## simulation 194
## simulation 195
## simulation 196
## simulation 197
## simulation 198
## simulation 199
## simulation 200
## simulation 201
## simulation 202
## simulation 203
## simulation 204
## simulation 205
## simulation 206
## simulation 207
## simulation 208
## simulation 209
## simulation 210
## simulation 211
## simulation 212
## simulation 213
## simulation 214
## simulation 215
## simulation 216
## simulation 217
## simulation 218
## simulation 219
## simulation 220
## simulation 221
## simulation 222
## simulation 223
## simulation 224
## simulation 225
## simulation 226
## simulation 227
## simulation 228
## simulation 229
## simulation 230
## simulation 231
## simulation 232
## simulation 233
## simulation 234
## simulation 235
## simulation 236
## simulation 237
## simulation 238
## simulation 239
## simulation 240
## simulation 241
## simulation 242
## simulation 243
## simulation 244
## simulation 245
## simulation 246
## simulation 247
## simulation 248
## simulation 249
## simulation 250
## simulation 251
## simulation 252
## simulation 253
## simulation 254
## simulation 255
## simulation 256
## simulation 257
## simulation 258
## simulation 259
## simulation 260
## simulation 261
## simulation 262
## simulation 263
## simulation 264
## simulation 265
## simulation 266
## simulation 267
## simulation 268
## simulation 269
## simulation 270
## simulation 271
## simulation 272
## simulation 273
## simulation 274
## simulation 275
## simulation 276
## simulation 277
## simulation 278
## simulation 279
## simulation 280
## simulation 281
## simulation 282
## simulation 283
## simulation 284
## simulation 285
## simulation 286
## simulation 287
## simulation 288
## simulation 289
## simulation 290
## simulation 291
## simulation 292
## simulation 293
## simulation 294
## simulation 295
## simulation 296
## simulation 297
## simulation 298
## simulation 299
## simulation 300
## simulation 301
## simulation 302
## simulation 303
## simulation 304
## simulation 305
## simulation 306
## simulation 307
## simulation 308
## simulation 309
## simulation 310
## simulation 311
## simulation 312
## simulation 313
## simulation 314
## simulation 315
## simulation 316
## simulation 317
## simulation 318
## simulation 319
## simulation 320
## simulation 321
## simulation 322
## simulation 323
## simulation 324
## simulation 325
## simulation 326
## simulation 327
## simulation 328
## simulation 329
## simulation 330
## simulation 331
## simulation 332
## simulation 333
## simulation 334
## simulation 335
## simulation 336
## simulation 337
## simulation 338
## simulation 339
## simulation 340
## simulation 341
## simulation 342
## simulation 343
## simulation 344
## simulation 345
## simulation 346
## simulation 347
## simulation 348
## simulation 349
## simulation 350
## simulation 351
## simulation 352
## simulation 353
## simulation 354
## simulation 355
## simulation 356
## simulation 357
## simulation 358
## simulation 359
## simulation 360
## simulation 361
## simulation 362
## simulation 363
## simulation 364
## simulation 365
## simulation 366
## simulation 367
## simulation 368
## simulation 369
## simulation 370
## simulation 371
## simulation 372
## simulation 373
## simulation 374
## simulation 375
## simulation 376
## simulation 377
## simulation 378
## simulation 379
## simulation 380
## simulation 381
## simulation 382
## simulation 383
## simulation 384
## simulation 385
## simulation 386
## simulation 387
## simulation 388
## simulation 389
## simulation 390
## simulation 391
## simulation 392
## simulation 393
## simulation 394
## simulation 395
## simulation 396
## simulation 397
## simulation 398
## simulation 399
## simulation 400
## simulation 401
## simulation 402
## simulation 403
## simulation 404
## simulation 405
## simulation 406
## simulation 407
## simulation 408
## simulation 409
## simulation 410
## simulation 411
## simulation 412
## simulation 413
## simulation 414
## simulation 415
## simulation 416
## simulation 417
## simulation 418
## simulation 419
## simulation 420
## simulation 421
## simulation 422
## simulation 423
## simulation 424
## simulation 425
## simulation 426
## simulation 427
## simulation 428
## simulation 429
## simulation 430
## simulation 431
## simulation 432
## simulation 433
## simulation 434
## simulation 435
## simulation 436
## simulation 437
## simulation 438
## simulation 439
## simulation 440
## simulation 441
## simulation 442
## simulation 443
## simulation 444
## simulation 445
## simulation 446
## simulation 447
## simulation 448
## simulation 449
## simulation 450
## simulation 451
## simulation 452
## simulation 453
## simulation 454
## simulation 455
## simulation 456
## simulation 457
## simulation 458
## simulation 459
## simulation 460
## simulation 461
## simulation 462
## simulation 463
## simulation 464
## simulation 465
## simulation 466
## simulation 467
## simulation 468
## simulation 469
## simulation 470
## simulation 471
## simulation 472
## simulation 473
## simulation 474
## simulation 475
## simulation 476
## simulation 477
## simulation 478
## simulation 479
## simulation 480
## simulation 481
## simulation 482
## simulation 483
## simulation 484
## simulation 485
## simulation 486
## simulation 487
## simulation 488
## simulation 489
## simulation 490
## simulation 491
## simulation 492
## simulation 493
## simulation 494
## simulation 495
## simulation 496
## simulation 497
## simulation 498
## simulation 499
## simulation 500
## get statistical significance of gene pair expression changes upon cell-cell interaction
spatial_all_scores = spatCellCellcom(seqfish_mini, spatial_network_name = "Delaunay_network", cluster_column = "cell_types",
random_iter = 500, gene_set_1 = select_ligands, gene_set_2 = select_receptors, adjust_method = "fdr",
do_parallel = T, cores = 4, verbose = "none")
## * plot communication scores #### select top LR ##
selected_spat = spatial_all_scores[p.adj <= 0.5 & abs(log2fc) > 0.1 & lig_nr >= 2 & rec_nr >= 2]
data.table::setorder(selected_spat, -PI)
top_LR_ints = unique(selected_spat[order(-abs(PI))]$LR_comb)[1:33]
top_LR_cell_ints = unique(selected_spat[order(-abs(PI))]$LR_cell_comb)[1:33]
plotCCcomHeatmap(gobject = seqfish_mini, comScores = spatial_all_scores, selected_LR = top_LR_ints,
selected_cell_LR = top_LR_cell_ints, show = "LR_expr")
plotCCcomDotplot(gobject = seqfish_mini, comScores = spatial_all_scores, selected_LR = top_LR_ints,
selected_cell_LR = top_LR_cell_ints, cluster_on = "PI")
## * spatial vs rank ####
comb_comm = combCCcom(spatialCC = spatial_all_scores, exprCC = expr_only_scores)
# top differential activity levels for ligand receptor pairs
plotRankSpatvsExpr(gobject = seqfish_mini, comb_comm, expr_rnk_column = "exprPI_rnk", spat_rnk_column = "spatPI_rnk",
midpoint = 10)
## for top 1 expression ranks, you recover 1.79 % of the highest spatial rank
## for top 10 expression ranks, you recover 32.55 % of the highest spatial rank
## for top 20 expression ranks, you recover 60.3 % of the highest spatial rank
## * recovery #### predict maximum differential activity
plotRecovery(gobject = seqfish_mini, comb_comm, expr_rnk_column = "exprPI_rnk", spat_rnk_column = "spatPI_rnk",
ground_truth = "spatial")
## percentage explained = 0.5274725
Giotto supports multiple ways for searching for spatially variable genes. Currently we have incorporated SpatialDE, trendceek, Spark, as well as two methods that we have developed binSpect and silhouetteRank. The common goal is to score genes in the spatial transcriptomic dataset based on the extent to which individual genes’ expression values form a spatially coherent pattern (or whether there is a dependence of expression on spatial locations). The methods achieve this goal through various algorithms and statistical tests.
The method uses Gaussian process regression to decompose expression variability into a spatial covariance term and nonspatial variance term. The spatial covariance term assumes a linear trend and periodic pattern of gene expression variation. Multiple different spatial covariance functions are tested including: (1) null model, (2) general Gaussian covariance (squared exponential), (3) linear covariance, and (4) periodic covariance functions. A suitable model is selected using Bayes information criterion.
km_spatialgenes <- spatialDE(gobject = seqfish_mini, expression_values = c(‘raw’, ‘normalized’, ‘scaled’, ‘custom’), size = c(4,2,1), color = c(“blue”, “green”, “red”), sig_alpha = 0.5, unsig_alpha = 0.5, python_path = NULL, show_plot = NA, return_plot = NA, save_plot = NA, save_param = list(), default_save_name = ‘SpatialDE’) km_spatialgenes
The input is a gene expression matrix. There are 4 version of expression matrix (indicated by expression_values). Raw version (in counts) is recommended. SpatialDE performs library size normalization (by default) if raw expression is used. Otherwise, one can also use “normalized” and skip SpatialDE normalization step.
There are no other parameters required. The parameters color, sig_alpha, unsig_alpha are used for plotting the Fraction spatial variance vs Adj. P-value https://github.com/Teichlab/SpatialDE, and is optional. To disable this FSV vs. Adj P-value plot, show_plot is set to NA (default). The parameters return_plot, save_plot, save_param are for saving the results automatically to disk (default values are NA). They are attached to every function (see CreateGiottoInstructions()).
A data frame with the results. There are 3 fields reported per gene: LLR, pval, qval. LLR is log-likelihood of model, useful for creating a whole ranking of genes unambiguously. P-val, Q-val are useful for cut-off based approach to filtering the spatial genes.
library(Giotto)
path_to_matrix = system.file("extdata", "seqfish_field_expr.txt", package = "Giotto")
path_to_locations = system.file("extdata", "seqfish_field_locs.txt", package = "Giotto")
my_giotto_object = createGiottoObject(raw_exprs = path_to_matrix, spatial_locs = path_to_locations)
## Consider to install these (optional) packages to run all possible Giotto commands: RTriangle FactoMiner
## Giotto does not automatically install all these packages as they are not absolutely required and this reduces the number of dependencies
## no external python path or giotto environment was found, will use default python path if available
##
## 1. use installGiottoEnvironment() to install a local giotto python environment which automatically installs all modules
##
## 2. provide an existing python path to python_path to use your own python path which has all modules installed
# processing
my_giotto_object <- filterGiotto(gobject = seqfish_mini, expression_threshold = 0.5, gene_det_in_min_cells = 20,
min_det_genes_per_cell = 0)
my_giotto_object <- normalizeGiotto(gobject = my_giotto_object)
# dimension reduction
my_giotto_object <- calculateHVG(gobject = my_giotto_object)
## return_plot = TRUE and return_gobject = TRUE
##
## plot will not be returned to object, but can still be saved with save_plot = TRUE or manually
##
## hvg has already been used, will be overwritten
my_giotto_object <- runPCA(gobject = my_giotto_object)
## hvg was found in the gene metadata information and will be used to select highly variable genes
##
## pca has already been used, will be overwritten
my_giotto_object <- runUMAP(my_giotto_object, dimensions_to_use = 1:5)
##
## umap has already been used, will be overwritten
# leiden clustering
my_giotto_object = doLeidenCluster(my_giotto_object, name = "leiden_clus")
##
## leiden_clus has already been used, will be overwritten
# annotate
metadata = pDataDT(my_giotto_object)
uniq_clusters = length(unique(metadata$leiden_clus))
clusters_cell_types = paste0("cell ", LETTERS[1:uniq_clusters])
names(clusters_cell_types) = 1:uniq_clusters
my_giotto_object = annotateGiotto(gobject = my_giotto_object, annotation_vector = clusters_cell_types,
cluster_column = "leiden_clus", name = "cell_types")
##
## annotation name cell_types was already used
## and will be overwritten
# create network (required for binSpect methods)
my_giotto_object = createSpatialNetwork(gobject = my_giotto_object, minimum_k = 2)
##
## Delaunay_network has already been used, will be overwritten
# identify genes with a spatial coherent expression profile
km_spatialgenes = binSpect(my_giotto_object, bin_method = "kmeans")
##
## This is the single parameter version of binSpect
## 1. matrix binarization complete
##
## 2. spatial enrichment test completed
##
## 3. (optional) average expression of high expressing cells calculated
##
## 4. (optional) number of high expressing cells calculated
set.seed(seed = 2841)
cell_proximities = cellProximityEnrichment(gobject = my_giotto_object, cluster_column = "cell_types",
spatial_network_name = "Delaunay_network", adjust_method = "fdr", number_of_simulations = 1000)
# barplot
cellProximityBarplot(gobject = my_giotto_object, CPscore = cell_proximities, min_orig_ints = 3,
min_sim_ints = 3)
# heatmap
cellProximityHeatmap(gobject = my_giotto_object, CPscore = cell_proximities, order_cell_types = T,
scale = T, color_breaks = c(-1.5, 0, 1.5), color_names = c("blue", "white", "red"))
# network
cellProximityNetwork(gobject = my_giotto_object, CPscore = cell_proximities, remove_self_edges = T,
only_show_enrichment_edges = T)
# network with self-edges
cellProximityNetwork(gobject = my_giotto_object, CPscore = cell_proximities, remove_self_edges = F,
self_loop_strength = 0.3, only_show_enrichment_edges = F, rescale_edge_weights = T, node_size = 8,
edge_weight_range_depletion = c(1, 2), edge_weight_range_enrichment = c(2, 5))
# Option 1
spec_interaction = "cell D--cell F"
cellProximitySpatPlot2D(gobject = my_giotto_object, interaction_name = spec_interaction, show_network = T,
cluster_column = "cell_types", cell_color = "cell_types", cell_color_code = c(`cell D` = "lightblue",
`cell F` = "red"), point_size_select = 4, point_size_other = 2)
# Option 2: create additional metadata
my_giotto_object = addCellIntMetadata(my_giotto_object, spatial_network = "Delaunay_network", cluster_column = "cell_types",
cell_interaction = spec_interaction, name = "D_F_interactions")
spatPlot(my_giotto_object, cell_color = "D_F_interactions", legend_symbol_size = 3, select_cell_groups = c("other_cell D",
"other_cell F", "select_cell D", "select_cell F"))